
	
***************************************
* (1.0) Clean up the raw data
***************************************
		
	***************************************
	* (1.1) Clean up the 2PK data
	***************************************
	use 	"$f_2PK", clear

	keep 		b1 prov a1 a2 e1 g0 g1 g2 g3y g3m g4 g5 g6 g7 g8y g8m ///
				b3 b4y b4m b5 b6 b8y b8m b9 b10y b10m b11 b12 b13 b14 ///
				b15y b15m e3 e4 j1 hh birthseq mbirth
			
	rename 		b1 		relate
	rename 		e1 		serialno
	rename 		g0 		serialnopreg
	rename 		g1 		pregorder
	rename 		g2 		pregresult
	rename 		g3y 	pregyear
	rename 		g3m 	pregmonth
	rename 		g4 		birthorder
	rename 		g5 		breastfeed_month
	rename 		g6 		mense_again
	rename 		g7 		birth_quota
	rename 		g8y 	childdeath_year
	rename 		g8m 	childdeath_month
	rename 		b3 		nationality
	rename 		b4y 	m_year_birth
	rename 		b4m 	m_month_birth
	rename 		b5 		m_place_birth	
	rename 		b6 		household_reg
	rename 		b8y 	m_year_reside
	rename 		b8m 	m_month_reside
	rename 		b9 		reason_migration
	rename 		b10y 	year_migration
	rename 		b10m 	month_migration
	rename 		b11 	orig_prov
	rename 		b12 	educ
	rename 		b13 	employ
	rename 		b14 	marital
	rename 		b15y 	year_marriage
	rename 		b15m 	month_marriage	
	rename 		e3 		year_menarche
	rename 		e4 		year_menopause
	rename 		j1 		answer_status

	*group is unique household id*
		egen 		group=group(prov a1 a2)
		lab var		group	"Household ID"

	*mother is unique mother id, more than 1 mother in each hh potentially (e.g. grandmothers and mothers in same hh)*
		egen 		mother=group(prov a1 a2 serialno)
		lab var		mother 	"Mother ID" 

	*0 is coded as less than 1 month so currently I use .5 month but can use dummy instead*
		replace 	breastfeed_month=.5 if breastfeed_month==0
		lab var 	breastfeed_month	"Months breastfed" 

	*99 is coded as no breastfeeding in codebook*
		replace	 	breastfeed_month=0 if breastfeed_month==99
		gen 		b0=breastfeed_month==0
		gen 		b1=breastfeed_month==.5

		label 		define g5 0 "never", modify
		lab var		b0 		"Never breastfed" 
		lab var		b1		"Breastfed <1 month" 

	*sex of birth*
		gen 		cmale=pregresult==1 if pregresult<=2
				*tab2way pregresult pregyear, colpct nofreq

		lab var		cmale 	"Male baby" 

	*date pregnancy finished*
		replace 	pregyear=1900+pregyear
		gen 		pregfdate=mdy(pregmonth,1,pregyear)
		format 		pregfdate %dN/D/Y

	*Han ethnicity*
		gen 		han=nationality==1
		lab var		han 	"Han"
		keep if 	han ==1
		
	*education continuous, can also use educ as dummies instead*
		gen 		ed=0
		replace 	ed=3 	if educ==5
		replace 	ed=6 	if educ==4
		replace 	ed=9 	if educ==3
		replace 	ed=12 	if educ==2
		replace 	ed=16 	if educ==1

		lab var		ed 		"Education (Continuous)" 

	*age of mother*
		gen 		motherage=round((88+7/12-m_year_birth-m_month_birth/12))

		lab var 	motherage	"Mother's Age at Survey" 

	*age of marriage*
		gen 		age_marriage=round((year_marriage+month_marriage/12-m_year_birth-m_month_birth/12))

		lab var		age_marriage "Mother's Age at Marriage" 

	*year of child death*
		replace 	childdeath_year=childdeath_year+1900

	*age at which child died*
		gen 		child_death_age=(childdeath_year+childdeath_month/12-pregyear-pregmonth/12)
		replace 	child_death_age=. if child_death_age<0

		lab var		child_death_age "Age at death (child)"

	*have a look at infant mortality
		gen 		born = .
		gen 		died = . 
		gen 		gborn = . 
		gen 		gdied = . 
		gen 		bborn = . 
		gen 		bdied = . 
		
		lab var		born 	"Total children born alive"
		lab var		died	"Total children died before age 1"
		lab var		gborn	"Total number of girls born alive" 
		lab var		gdied	"Total number of girls died before age 1" 
		lab var		bborn	"Total number of boys born alive" 
		lab var		bdied 	"Total number of boys died before age 1" 
		
	forval 		year = 1941/1989 { 
		qui sum 	pregyear if pregyear==`year' & cmale!=.
		replace 	born = r(N) if pregyear==`year'

		qui sum 	pregyear if child_death_age<=1 & pregyear==`year' & cmale!=.
		replace 	died = r(N) if pregyear==`year'

		qui sum 	pregyear if pregyear==`year' & cmale==1
		replace 	bborn = r(N) if pregyear==`year'

		qui sum 	pregyear if child_death_age<=1 & pregyear==`year'  & cmale==1
		replace 	bdied = r(N) if pregyear==`year'

		qui sum 	pregyear if pregyear==`year' & cmale==0
		replace 	gborn = r(N) if pregyear==`year'

		qui sum 	pregyear if child_death_age<=1 & pregyear==`year'  & cmale==0
		replace 	gdied = r(N) if pregyear==`year' 

	}

	gen 		IMR 	= (died/born)*1000
	gen 		bIMR 	= (bdied/bborn)*1000
	gen			gIMR	= (gdied/gborn)*1000

	lab var		IMR		"Infant mortality rate" 
	lab var		bIMR	"Infant mortality rate (boys)"
	lab var		gIMR	"Infant mortality rate (girls)" 

	*dummy if child died*
	gen 		child_died=child_death_age!=. if pregresult<=2

	lab var		child_died "Child died" 

	*child age at time of survey* 
	gen 		child_age=(1988+7/12-pregyear-pregmonth/12) if pregresult<=2
	* My instinct is to replace child_age with . if child died... 
	* Paul's code: replace 	child_age=child_death_age if child_died==1
	replace 	child_age=. if child_died==1

	lab var		child_age	"Child age at survey" 

	*get household registration status*
	sort 		household_reg
	merge 		m:1 household_reg using "$d_data\hh_reg.dta"

	drop 		_merge

	gen 		city	=word(var1,1)=="city"
	gen 		country	=word(var1,1)=="countryside"
	gen 		town	=word(var1,1)=="town"

	lab var		city 	"Urban registration" 
	lab var		country	"Rural registration" 
	lab var		town	"Town registration" 

	gen 		str setting=""
	replace 	setting="CITY" if city==1
	replace 	setting="COUNTRY" if country==1
	replace 	setting="TOWN" if town==1
	replace 	setting="OTHER" if city==0 & country==0 & town==0
	drop 		var1
	drop 		if setting=="OTHER"

	lab var		setting	"Registration (string)"

	*dummies for pregancy outcomes*
	gen 		stillbirth=pregresult==3
	gen 		abortion=pregresult==6
	gen 		miscarriage=pregresult==5

	lab var		stillbirth 	"Stillborn" 
	lab var		abortion 	"Abortion" 
	lab var		miscarriage "Miscarriage (spontaneous abortion)"
	  
	*mother's age at time of child's birth*
	gen 		m_age_cborn=(pregyear-1900+pregmonth/12-m_year_birth-m_month_birth/12)
	lab var		m_age_cborn "Mother's age at birth of child" 

	*length of mother's residence at time of survey*
	gen 		reside_length=(88+7/12-m_year_reside-m_month_reside/12)
	lab var		reside_length "How long has mother lived in current location"

	*length of mother's migration pattern at time of survey*
	gen 		reside_s_mig=(88+7/12-year_migration-month_migration/12)
	lab var		reside_s_mig "How long ago did mother migrate?"

	*time to end of pregnancy from year of marriage*
	gen 		time_child=(pregyear-1900+pregmonth/12-year_marriage-month_marriage/12)
	lab var		time_child "How long after marriage child born" 

	*time to menarche*
	gen 		age_menarche=(year_menarche-m_year_birth)
	lab var		age_menarche "Age of menarche"

	*get husband characteristics*
	sort 		prov a1 a2 serialno
	merge 		m:1 prov a1 a2 serialno using "$d_data\husband.dta"
	drop 		if _merge == 2

	lab var		husband_han 	"Husband is Han" 
	lab var		husband_ed		"Husband's education" 
	lab var		husband_employ	"Husband employment" 

	*DROP HH IF ANY MOTHER HAS MARRIAGE AGE<10*
	gen z1=age_marriage<10
	egen z2=max(z1), by(group)
	gen 	early_marriage = 1 if z2==1
	drop z1 z2

	*DROP HH IF ANY MOTHER HAD A MULTIPLE BIRTH IN HISTORY*
	gen z1=mbirth>1 if mbirth!=.
	egen z2=max(z1), by(group)
	gen 	multiples = 1 if z2==1
	drop z1 z2

	*DROP HH IF ANY FEMALE HAD AGE MENARCHE>21*
	gen z1=age_menarche>21 if age_menarche!=.
	egen z2=max(z1), by(group)
	gen 	late_menarche = 1 if z2==1
	drop z1 z2

	*DROP HH IF ANY MOTHER GAVE BIRTH < AGE 13*
	gen mage_childborn=(pregyear-1900+pregmonth/12-m_year_birth-m_month_birth/12)
	gen z1=mage_childborn<13
	egen z2=max(z1), by(group)
	gen		early_childbirth = 1 if z2==1
	drop z1 z2 mage_childborn

	*DROP HH IF ANY MOTHER IS REMARRIED, DIVORCED or WIDOWED*
	keep if marital==1

	*gen filter = 1 if early_marriage==1 | collective_hhold==1 | multiples==1 | late_menarche==1 | many_children==1 | early_childbirth==1
	gen filter = 1 if early_marriage==1 | multiples==1 | late_menarche==1 | early_childbirth==1
	egen id = group(prov serialno hh group mother)
	gen rural 		= (country==1 | town ==1)
	gen rural2 		= country ==1

	replace m_year_birth = 1900 + m_year_birth 
	 
	cap drop order
	rename 	pregyear year
	gen livebirth = 1 if pregresult<3

	sort livebirth id year pregmonth 
	by livebirth id: gen order_livebirth = _n
	gen order_nolivebirth = order_livebirth if livebirth == . 
	replace order_livebirth = . if livebirth == . 

	sort id year pregmonth
	by id: gen order_pregnancy = _n

	lab var order_livebirth "Birth order among livebirths"
	lab var order_nolivebirth "Order of pregnancies not resulting in live births" 
	lab var order_pregnancy "Pregnancy Order" 

********************************************************
* (2.0) Add Policy Vars  
********************************************************

	 ***************************************************
	 * (2.1) LLF Policy  
	 ***************************************************
	 
	 cap drop LLF 
	 cap drop LLFyear
	 gen LLF = .
	 gen LLFyear = . 
	 
	 * Beijing 1971
	 replace LLF = 0 	if prov == 11
	 replace LLF  = 1 	if prov == 11 	& year >= 1971
	 replace LLFyear = 1971 if prov==11
	 
	 * Tianjin 1972
	 replace LLF = 0 	if prov == 12
	 replace LLF  = 1 	if prov == 12 	& year >= 1972
	 replace LLFyear = 1972 if prov==12
	 
	 * Hebei 1972
	 replace LLF = 0 	if prov == 13
	 replace LLF  = 1 	if prov == 13 	& year >= 1972
	 replace LLFyear = 1972 if prov==13
	 
	 * Shanxi 1973
	 replace LLF = 0 	if prov == 14
	 replace LLF  = 1 	if prov == 14 	& year >= 1973
	 replace LLFyear = 1973 if prov==14

	 * Inner Mongolia 1979
	 replace LLF = 0 	if prov == 15
	 replace LLF  = 1 	if prov == 15 	& year >= 1979
	 replace LLFyear = 1979 if prov==15

	 * Liaoning 1971
	 replace LLF = 0 	if prov == 21
	 replace LLF  = 1 	if prov == 21 	& year >= 1971
	 replace LLFyear = 1971 if prov==21

	 * Jilin 1971
	 replace LLF = 0 	if prov == 22
	 replace LLF  = 1 	if prov == 22 	& year >= 1971
	 replace LLFyear = 1971 if prov==22

	 * Heilongjiang 1972
	 replace LLF = 0 	if prov == 23
	 replace LLF  = 1 	if prov == 23 	& year >= 1972
	 replace LLFyear = 1972 if prov==23

	  * Shanghai 1973
	 replace LLF = 0 	if prov == 31
	 replace LLF  = 1 	if prov == 31 	& year >= 1973
	 replace LLFyear = 1973 if prov==31

	 * jiangsu 1970
	 replace LLF = 0 	if prov == 32
	 replace LLF  = 1 	if prov == 32 	& year >= 1970
	 replace LLFyear = 1970 if prov==32

	 * Anhui 1974
	 replace LLF = 0 	if prov == 34
	 replace LLF  = 1 	if prov == 34 	& year >= 1974
	 replace LLFyear = 1974 if prov==34

	 * Fujian 1973
	 replace LLF = 0 	if prov == 35
	 replace LLF  = 1 	if prov == 35 	& year >= 1973
	 replace LLFyear = 1973 if prov==35

	 * Jiangxi 1972
	 replace LLF = 0 	if prov == 36
	 replace LLF  = 1 	if prov == 36	& year >= 1972
	 replace LLFyear = 1972 if prov==36

	 * Shandong 1972
	 replace LLF = 0 	if prov == 37
	 replace LLF  = 1 	if prov == 37 	& year >= 1972
	 replace LLFyear = 1972 if prov==37
	 
	 * Henan 1974
	 replace LLF = 0 	if prov == 41
	 replace LLF  = 1 	if prov == 41 	& year >= 1974
	 replace LLFyear = 1974 if prov==41

	 * Hubei 1972
	 replace LLF = 0 	if prov == 42
	 replace LLF  = 1 	if prov == 42 	& year >= 1972
	 replace LLFyear = 1972 if prov==42

	 * Hunan 1974
	 replace LLF = 0 	if prov == 43
	 replace LLF  = 1 	if prov == 43 	& year >= 1974
	 replace LLFyear = 1974 if prov==43
	 
	 * Guangdong 1970
	 replace LLF = 0 	if prov == 44
	 replace LLF  = 1 	if prov == 44 	& year >= 1970
	 replace LLFyear = 1970 if prov==44

	 * Guangxi 1971
	 replace LLF = 0 	if prov == 45
	 replace LLF  = 1 	if prov == 45 	& year >= 1971
	 replace LLFyear = 1971 if prov==45

	 * Hainan 1969
	 replace LLF = 0 	if prov == 46
	 replace LLF  = 1 	if prov == 46 	& year >= 1969
	 replace LLFyear = 1969 if prov==46

	  * Sichuan 1971
	 replace LLF = 0 	if prov == 51
	 replace LLF  = 1 	if prov == 51 	& year >= 1971
	 replace LLFyear = 1971 if prov==51
	 
	  * Guizhou 1971
	 replace LLF = 0 	if prov == 52
	 replace LLF  = 1 	if prov == 52 	& year >= 1971
	 replace LLFyear = 1971 if prov==52

	  * Yunnan 1972
	 replace LLF = 0 	if prov == 53
	 replace LLF  = 1 	if prov == 53 	& year >= 1972
	 replace LLFyear = 1972 if prov==53

	  * Shaanxi 1973
	 replace LLF = 0 	if prov == 61
	 replace LLF  = 1 	if prov == 61 	& year >= 1973
	 replace LLFyear = 1973 if prov==61
	 
	 * Gansu 1971
	 replace LLF = 0 	if prov == 62
	 replace LLF  = 1 	if prov == 62 	& year >= 1971
	 replace LLFyear = 1971 if prov==62

	 * Qinghai 1972
	 replace LLF = 0 	if prov == 63
	 replace LLF  = 1 	if prov == 63 	& year >= 1972
	 replace LLFyear = 1972 if prov==63

	 * Ningxia 1973
	 replace LLF = 0 	if prov == 64
	 replace LLF  = 1 	if prov == 64 	& year >= 1973
	 replace LLFyear = 1973 if prov==64

	 * Xinjiang 1975
	 replace LLF = 0 	if prov == 65
	 replace LLF  = 1 	if prov == 65 	& year >= 1975
	 replace LLFyear = 1975 if prov==65

	 
	 cap drop years_since_LLF
	 gen years_since_LLF = year - LLFyear 
	 

	 ***************************************************
	 * (2.2) Ultrasound availability 
	 ***************************************************
	 
	 cap drop ultrasound 
	 cap drop ultrasoundyr
	 gen ultrasound = .
	 gen ultrasoundyr = . 
	 
	 * Beijing 1979
	 replace ultrasound 	= 0 	if prov == 11
	 replace ultrasound  	= 1 	if prov == 11 	& year >= 1979
	 replace ultrasoundyr 	= 1979 	if prov == 11
	 
	 * Tianjin 1983
	 replace ultrasound 	= 0 	if prov == 12
	 replace ultrasound  	= 1 	if prov == 12 	& year >= 1983
	 replace ultrasoundyr 	= 1983 	if prov == 12
	 
	 * Hebei 1977
	 replace ultrasound 	= 0 	if prov == 13
	 replace ultrasound  	= 1 	if prov == 13 	& year >= 1977
	 replace ultrasoundyr 	= 1977 	if prov == 13 
	 
	 * Shanxi 1979
	 replace ultrasound 	= 0 	if prov == 14
	 replace ultrasound  	= 1 	if prov == 14 	& year >= 1979
	 replace ultrasoundyr 	= 1979 	if prov == 14

	 * Inner Mongolia 1977
	 replace ultrasound 	= 0 	if prov == 15
	 replace ultrasound  	= 1 	if prov == 15 	& year >= 1977
	 replace ultrasoundyr 	= 1977 	if prov == 15
	  
	 * Liaoning 1979
	 replace ultrasound 	= 0 	if prov == 21
	 replace ultrasound  	= 1 	if prov == 21 	& year >= 1979
	 replace ultrasoundyr 	= 1979 	if prov == 21

	 * Jilin 1979
	 replace ultrasound 	= 0 	if prov == 22
	 replace ultrasound  	= 1 	if prov == 22 	& year >= 1979
	 replace ultrasoundyr 	= 1979 	if prov == 22

	 * Heilongjiang 1982
	 replace ultrasound 	= 0 	if prov == 23
	 replace ultrasound  	= 1 	if prov == 23 	& year >= 1982
	 replace ultrasoundyr 	= 1982 	if prov == 23

	 
	 * jiangsu 1976
	 replace ultrasound 	= 0 	if prov == 32
	 replace ultrasound  	= 1 	if prov == 32 	& year >= 1976
	 replace ultrasoundyr 	= 1976 	if prov == 32

	 * Zhejiang 1982
	 replace ultrasound 	= 0 	if prov == 33
	 replace ultrasound  	= 1 	if prov == 33 	& year >= 1982
	 replace ultrasoundyr 	= 1982 	if prov == 33
	 
	 * Anhui 1979
	 replace ultrasound 	= 0 	if prov == 34
	 replace ultrasound  	= 1 	if prov == 34 	& year >= 1979
	 replace ultrasoundyr 	= 1979 	if prov == 34

	 * Fujian 1981	
	 replace ultrasound 	= 0 	if prov == 35
	 replace ultrasound  	= 1 	if prov == 35 	& year >= 1981
	 replace ultrasoundyr 	= 1981 	if prov == 35
	 
	 * Jiangxi 1979
	 replace ultrasound 	= 0 	if prov == 36
	 replace ultrasound  	= 1 	if prov == 36	& year >= 1979
	 replace ultrasoundyr 	= 1979 	if prov == 36

	 * Shandong 1979
	 replace ultrasound 	= 0 	if prov == 37
	 replace ultrasound  	= 1 	if prov == 37 	& year >= 1979
	 replace ultrasoundyr 	= 1979 	if prov == 37

	 * Henan 1974
	 replace ultrasound 	= 0 	if prov == 41
	 replace ultrasound  	= 1 	if prov == 41 	& year >= 1974
	 replace ultrasoundyr 	= 1974 	if prov == 41
	 
	 * Hubei 1980
	 replace ultrasound 	= 0 	if prov == 42
	 replace ultrasound 	= 1 	if prov == 42 	& year >= 1980
	 replace ultrasoundyr 	= 1980 	if prov == 42

	 * Hunan 1981
	 replace ultrasound 	= 0 	if prov == 43
	 replace ultrasound  	= 1 	if prov == 43 	& year >= 1981
	 replace ultrasoundyr 	= 1981 	if prov == 43

	 * Guangdong 1982
	 replace ultrasound 	= 0 	if prov == 44
	 replace ultrasound  	= 1 	if prov == 44 	& year >= 1982
	 replace ultrasoundyr 	= 1982 	if prov == 44

	 * Guangxi 1981
	 replace ultrasound 	= 0 	if prov == 45
	 replace ultrasound  	= 1 	if prov == 45 	& year >= 1981
	 replace ultrasoundyr 	= 1981 	if prov == 45
	 
	 * Hainan 1984
	 replace ultrasound 	= 0 	if prov == 46
	 replace ultrasound  	= 1 	if prov == 46 	& year >= 1984
	 replace ultrasoundyr 	= 1984 	if prov == 46
	 
	 * Sichuan 1979
	 replace ultrasound 	= 0 	if prov == 51
	 replace ultrasound  	= 1 	if prov == 51 	& year >= 1979
	 replace ultrasoundyr 	= 1979	if prov == 51
	 
	 * Guizhou 1977
	 replace ultrasound 	= 0 	if prov == 52
	 replace ultrasound  	= 1 	if prov == 52 	& year >= 1977
	 replace ultrasoundyr 	= 1977 	if prov == 52
	 
	 * Yunnan 1979
	 replace ultrasound 	= 0 	if prov == 53
	 replace ultrasound  	= 1 	if prov == 53 	& year >= 1979
	 replace ultrasoundyr 	= 1979 	if prov == 53
	 
	 * Shaanxi 1981
	 replace ultrasound 	= 0 	if prov == 61
	 replace ultrasound  	= 1 	if prov == 61 	& year >= 1981
	 replace ultrasoundyr 	= 1981 	if prov == 61
	 

	 * Gansu 1977
	 replace ultrasound 	= 0 	if prov == 62
	 replace ultrasound  	= 1 	if prov == 62 	& year >= 1977
	 replace ultrasoundyr 	= 1977 	if prov == 62

	 * Qinghai 1981
	 replace ultrasound 	= 0 	if prov == 63
	 replace ultrasound  	= 1 	if prov == 63 	& year >= 1981
	 replace ultrasoundyr 	= 1981 	if prov == 63

	 * Ningxia 1983
	 replace ultrasound 	= 0 	if prov == 64
	 replace ultrasound  	= 1 	if prov == 64 	& year >= 1983
	 replace ultrasoundyr 	= 1983 	if prov == 64

	 cap drop years_since_ultrasound
	 gen years_since_ultrasound = year - ultrasoundyr 
	 
	sort id order_livebirth 

	*************************************************************
	* (2.3) Determine if there is a son
	* 			Note that cases of multiples could be messed up
	* 			from this method. But multiples are excluded from
	* 			sex selection analysis
	*************************************************************

	cap drop son 
	gen son = 0
	replace son = 1 if cmale[_n-1] == 1 & id[_n-1] == id & order_livebirth == 2 & childdeath_year[_n-1] > year
	
	replace son = 1 if cmale[_n-1] == 1 & id[_n-1] == id & order_livebirth == 3 & childdeath_year[_n-1] > year
	replace son = 1 if cmale[_n-2] == 1 & id[_n-2] == id & order_livebirth == 3 & childdeath_year[_n-2] > year
	 
	replace son = 1 if cmale[_n-1] == 1 & id[_n-1] == id & order_livebirth == 4 & childdeath_year[_n-1] > year
	replace son = 1 if cmale[_n-2] == 1 & id[_n-2] == id & order_livebirth == 4 & childdeath_year[_n-2] > year
	replace son = 1 if cmale[_n-3] == 1 & id[_n-3] == id & order_livebirth == 4 & childdeath_year[_n-3] > year

	replace son = 1 if cmale[_n-1] == 1 & id[_n-1] == id & order_livebirth == 5 & childdeath_year[_n-1] > year
	replace son = 1 if cmale[_n-2] == 1 & id[_n-2] == id & order_livebirth == 5 & childdeath_year[_n-2] > year
	replace son = 1 if cmale[_n-3] == 1 & id[_n-3] == id & order_livebirth == 5 & childdeath_year[_n-3] > year
	replace son = 1 if cmale[_n-4] == 1 & id[_n-4] == id & order_livebirth == 5 & childdeath_year[_n-4] > year

	replace son = 1 if cmale[_n-1] == 1 & id[_n-1] == id & order_livebirth == 6 & childdeath_year[_n-1] > year
	replace son = 1 if cmale[_n-2] == 1 & id[_n-2] == id & order_livebirth == 6 & childdeath_year[_n-2] > year
	replace son = 1 if cmale[_n-3] == 1 & id[_n-3] == id & order_livebirth == 6 & childdeath_year[_n-3] > year
	replace son = 1 if cmale[_n-4] == 1 & id[_n-4] == id & order_livebirth == 6 & childdeath_year[_n-4] > year
	replace son = 1 if cmale[_n-5] == 1 & id[_n-5] == id & order_livebirth == 6 & childdeath_year[_n-5] > year

	replace son = 1 if cmale[_n-1] == 1 & id[_n-1] == id & order_livebirth == 7 & childdeath_year[_n-1] > year
	replace son = 1 if cmale[_n-2] == 1 & id[_n-2] == id & order_livebirth == 7 & childdeath_year[_n-2] > year
	replace son = 1 if cmale[_n-3] == 1 & id[_n-3] == id & order_livebirth == 7 & childdeath_year[_n-3] > year
	replace son = 1 if cmale[_n-4] == 1 & id[_n-4] == id & order_livebirth == 7 & childdeath_year[_n-4] > year
	replace son = 1 if cmale[_n-5] == 1 & id[_n-5] == id & order_livebirth == 7 & childdeath_year[_n-5] > year
	replace son = 1 if cmale[_n-6] == 1 & id[_n-6] == id & order_livebirth == 7 & childdeath_year[_n-6] > year

	replace son = 1 if cmale[_n-1] == 1 & id[_n-1] == id & order_livebirth == 8 & childdeath_year[_n-1] > year
	replace son = 1 if cmale[_n-2] == 1 & id[_n-2] == id & order_livebirth == 8 & childdeath_year[_n-2] > year
	replace son = 1 if cmale[_n-3] == 1 & id[_n-3] == id & order_livebirth == 8 & childdeath_year[_n-3] > year
	replace son = 1 if cmale[_n-4] == 1 & id[_n-4] == id & order_livebirth == 8 & childdeath_year[_n-4] > year
	replace son = 1 if cmale[_n-5] == 1 & id[_n-5] == id & order_livebirth == 8 & childdeath_year[_n-5] > year
	replace son = 1 if cmale[_n-6] == 1 & id[_n-6] == id & order_livebirth == 8 & childdeath_year[_n-6] > year
	replace son = 1 if cmale[_n-7] == 1 & id[_n-7] == id & order_livebirth == 8 & childdeath_year[_n-7] > year

	replace son = 1 if cmale[_n-1] == 1 & id[_n-1] == id & order_livebirth == 9 & childdeath_year[_n-1] > year
	replace son = 1 if cmale[_n-2] == 1 & id[_n-2] == id & order_livebirth == 9 & childdeath_year[_n-2] > year
	replace son = 1 if cmale[_n-3] == 1 & id[_n-3] == id & order_livebirth == 9 & childdeath_year[_n-3] > year
	replace son = 1 if cmale[_n-4] == 1 & id[_n-4] == id & order_livebirth == 9 & childdeath_year[_n-4] > year
	replace son = 1 if cmale[_n-5] == 1 & id[_n-5] == id & order_livebirth == 9 & childdeath_year[_n-5] > year
	replace son = 1 if cmale[_n-6] == 1 & id[_n-6] == id & order_livebirth == 9 & childdeath_year[_n-6] > year
	replace son = 1 if cmale[_n-7] == 1 & id[_n-7] == id & order_livebirth == 9 & childdeath_year[_n-7] > year
	replace son = 1 if cmale[_n-8] == 1 & id[_n-8] == id & order_livebirth == 9 & childdeath_year[_n-8] > year

	replace son = 1 if cmale[_n-1] == 1 & id[_n-1] == id & order_livebirth == 10 & childdeath_year[_n-1] > year
	replace son = 1 if cmale[_n-2] == 1 & id[_n-2] == id & order_livebirth == 10 & childdeath_year[_n-2] > year
	replace son = 1 if cmale[_n-3] == 1 & id[_n-3] == id & order_livebirth == 10 & childdeath_year[_n-3] > year
	replace son = 1 if cmale[_n-4] == 1 & id[_n-4] == id & order_livebirth == 10 & childdeath_year[_n-4] > year
	replace son = 1 if cmale[_n-5] == 1 & id[_n-5] == id & order_livebirth == 10 & childdeath_year[_n-5] > year
	replace son = 1 if cmale[_n-6] == 1 & id[_n-6] == id & order_livebirth == 10 & childdeath_year[_n-6] > year
	replace son = 1 if cmale[_n-7] == 1 & id[_n-7] == id & order_livebirth == 10 & childdeath_year[_n-7] > year
	replace son = 1 if cmale[_n-8] == 1 & id[_n-8] == id & order_livebirth == 10 & childdeath_year[_n-8] > year
	replace son = 1 if cmale[_n-9] == 1 & id[_n-9] == id & order_livebirth == 10 & childdeath_year[_n-9] > year
		
	cap drop order_son
		gen 	order_son = 0 if son==0 | order_livebirth == 1
		replace order_son = order_livebirth if son==1 & order_livebirth > 1 
		
	cap drop noson
		gen noson =  son == 0 if son != .

		
	cap drop tot_kids
	bysort id: egen tot_kids = max(order_livebirth)	

	label data "China 2 per thousand fertility survey, birth level panel"
	save `"$f_2PKbirthpanel"', replace 
	 
*****************************************************
* (3.0) Prep mother-year recodes
*****************************************************

	*****************************************************
	* (3.1) Married women
	*****************************************************

	use `"$f_2PKbirthpanel"', clear
	drop if years_since_LLF == .
	keep if livebirth==1
	rename year pregyear

	keep id age_marriage m_year_birth mult m_month_birth year_marriage tot_kids order_livebirth pregyear pregmonth cmale childdeath_year childdeath_month years_since_LLF LLFyear educ age_marriage prov  rural son
	duplicates tag id order_livebirth, gen(tag) 
	drop if tag>0

	replace year_marriage = year_marriage + 1900

	forval year = 1955/1982 { 
	preserve
	 
	*determine if women had any child or had a son in the reference year
	cap drop temp1 temp2
	gen temp1 = pregyear == `year'
	gen temp2 = (pregyear == `year' & cmale==1)

	bysort id: egen delivered = max(temp1)
	bysort id: egen delivered_son = max(temp2)

	drop temp* 

	*determine mothers' total living children, and indicator for having a surviving son in the years preceeding
	gen temp1 = 1 if pregyear < `year' & childdeath_year > `year'
	gen temp2 = 1 if cmale == 1 & pregyear < `year' & childdeath_year > `year'

	bysort id: egen living_children = total(temp1)
	bysort id: egen hasson = max(temp2)
	replace hasson = 0 if hasson == .
	drop temp* 

	*determine mothers' parity
	gen temp1 = 1 if pregyear < `year' 
	bysort id: egen parity = total(temp1)
	drop temp*

	gen year = `year'
	gen current_age = `year' - m_year_birth

	gen temp1 = pregyear if pregyear== `year' 
	gen temp2 = pregmonth if pregyear==`year'
	bysort id: egen birthyr = max(temp1)
	bysort id: egen birthmo = max(temp2)

	*collapse to a mother - level dataset for that year 
	keep  	id 																		/// unique identifier
			current_age delivered delivered_son hasson living_children	parity		/// mother-level time varying variables
			m_year_birth m_month_birth educ age_marriage birthyr birthmo rural 		/// mother-level time invariant variables 
			prov LLFyear year year_marriage											/// spacial and temporal variables 

	duplicates drop

	gen years_since_LLF = `year' - LLFyear  

	*keep only women married in or before reference year AND under 49 years old
	di "AGE RESTRICTION DROP"
	drop if year_marriage > `year'
	drop if current_age>49
	di "AGE RESTRICTION DROP"

		if `year' == 1955 { 
		label data "China 2 per thousand fertility survey, mother level panel"
		save `"$f_2PKmotherpanel"', replace 
		} 
		
		else if `year' > 1955 { 
		append using `"$f_2PKmotherpanel"'
		label data "China 2 per thousand fertility survey, mother level panel"
		save `"$f_2PKmotherpanel"', replace
		} 
	restore
	}
	 
